LETS SEE THE QUALITY OF THE HIERARCHICAL PARTITION
cdg <- aggregate(as.data.frame(dcon), list(c1), mean)
cdg
plot(cdg[,1], cdg[,7])

#Profiling plots
#Calcula els valor test de la variable Xnum per totes les modalitats del factor P
ValorTestXnum <- function(Xnum,P){
#freq dis of fac
nk <- as.vector(table(P));
n <- sum(nk);
#mitjanes x grups
xk <- tapply(Xnum,P,mean);
#valors test
txk <- (xk-mean(Xnum))/(sd(Xnum)*sqrt((n-nk)/(n*nk)));
#p-values
pxk <- pt(txk,n-1,lower.tail=F);
for(c in 1:length(levels(as.factor(P)))){if (pxk[c]>0.5){pxk[c]<-1-pxk[c]}}
return (pxk)
}
ValorTestXquali <- function(P,Xquali){
taula <- table(P,Xquali);
n <- sum(taula);
pk <- apply(taula,1,sum)/n;
pj <- apply(taula,2,sum)/n;
pf <- taula/(n*pk);
pjm <- matrix(data=pj,nrow=dim(pf)[1],ncol=dim(pf)[2], byrow=TRUE);
dpf <- pf - pjm;
dvt <- sqrt(((1-pk)/(n*pk))%*%t(pj*(1-pj)));
#i hi ha divisions iguals a 0 dona NA i no funciona
zkj <- dpf
zkj[dpf!=0]<-dpf[dpf!=0]/dvt[dpf!=0];
pzkj <- pnorm(zkj,lower.tail=F);
for(c in 1:length(levels(as.factor(P)))){for (s in 1:length(levels(Xquali))){if (pzkj[c,s]> 0.5){pzkj[c,s]<-1- pzkj[c,s]}}}
return (list(rowpf=pf,vtest=zkj,pval=pzkj))
}
#source("file")
#dades contain the dataset
dades <- dd[, setdiff(names(dd),
c("fnlwgt", "income", "cluster"))]
#dades<-dd[filtro,]
#dades<-df
K<-dim(dades)[2]
par(ask=TRUE)
#P must contain the class variable
#P<-dd[,3]
P<-c2
#P<-dd[,18]
nameP<-"classe"
#P<-df[,33]
nc<-length(levels(factor(P)))
nc
## [1] 3
pvalk <- matrix(data=0,nrow=nc,ncol=K, dimnames=list(levels(P),names(dades)))
nameP<-"Class"
n<-dim(dades)[1]
for(k in 1:K){
if (is.numeric(dades[,k])){
print(paste("Anàlisi per classes de la Variable:", names(dades)[k]))
boxplot(dades[,k]~P, main=paste("Boxplot of", names(dades)[k], "vs", nameP ), horizontal=TRUE)
barplot(tapply(dades[[k]], P, mean),main=paste("Means of", names(dades)[k], "by", nameP ))
abline(h=mean(dades[[k]]))
legend(0,mean(dades[[k]]),"global mean",bty="n")
print("Estadístics per groups:")
for(s in levels(as.factor(P))) {print(summary(dades[P==s,k]))}
o<-oneway.test(dades[,k]~P)
print(paste("p-valueANOVA:", o$p.value))
kw<-kruskal.test(dades[,k]~P)
print(paste("p-value Kruskal-Wallis:", kw$p.value))
pvalk[,k]<-ValorTestXnum(dades[,k], P)
print("p-values ValorsTest: ")
print(pvalk[,k])
}else{
if(class(dd[,k])=="Date"){
print(summary(dd[,k]))
print(sd(dd[,k]))
#decide breaks: weeks, months, quarters...
hist(dd[,k],breaks="weeks")
}else{
#qualitatives
print(paste("Variable", names(dades)[k]))
table<-table(P,dades[,k])
# print("Cross-table")
# print(table)
rowperc<-prop.table(table,1)
colperc<-prop.table(table,2)
# print("Distribucions condicionades a files")
# print(rowperc)
#ojo porque si la variable es true o false la identifica amb el tipus Logical i
#aquest no te levels, por tanto, coertion preventiva
dades[,k]<-as.factor(dades[,k])
marg <- table(as.factor(P))/n
print(append("Categories=",levels(as.factor(dades[,k]))))
#from next plots, select one of them according to your practical case
#with legend
plot(marg,type="l",ylim=c(0,1),main=paste("Prop. of classes by",names(dades)[k]))
paleta<-rainbow(length(levels(dades[,k])))
for(c in 1:length(levels(dades[,k]))){lines(colperc[,c],col=paleta[c]) }
legend("topright", levels(dades[,k]), col=paleta, lty=2, cex=0.6)
#condicionades a classes
print(append("Categories=",levels(dades[,k])))
#with legend
plot(marg,type="n",ylim=c(0,1),main=paste("Prop. of classes by",names(dades)[k]))
paleta<-rainbow(length(levels(dades[,k])))
for(c in 1:length(levels(dades[,k]))){lines(rowperc[,c],col=paleta[c]) }
legend("topright", levels(dades[,k]), col=paleta, lty=2, cex=0.6)
#amb variable en eix d'abcisses
marg <-table(dades[,k])/n
print(append("Categories=",levels(dades[,k])))
paleta<-rainbow(length(levels(as.factor(P))))
#with legend
plot(marg,type="l",ylim=c(0,1),main=paste("Prop. of classes by",names(dades)[k]), las=3)
for(c in 1:length(levels(as.factor(P)))){lines(rowperc[c,],col=paleta[c])}
legend("topright", levels(as.factor(P)), col=paleta, lty=2, cex=0.6)
#condicionades a columna
#with legend
plot(marg,type="n",ylim=c(0,1),main=paste("Prop. of classes by",names(dades)[k]), las=3)
for(c in 1:length(levels(as.factor(P)))){lines(colperc[c,],col=paleta[c])}
legend("topright", levels(as.factor(P)), col=paleta, lty=2, cex=0.6)
table<-table(dades[,k],P)
print("Cross Table:")
print(table)
print("Distribucions condicionades a columnes:")
print(colperc)
#diagrames de barres apilades
paleta<-rainbow(length(levels(dades[,k])))
barplot(table(dades[,k], as.factor(P)), beside=FALSE,col=paleta )
legend("topright",levels(as.factor(dades[,k])),pch=1,cex=0.5, col=paleta)
#diagrames de barres adosades
barplot(table(dades[,k], as.factor(P)), beside=TRUE,col=paleta)
legend("topright",levels(as.factor(dades[,k])),pch=1,cex=0.5, col=paleta)
print("Test Chi quadrat: ")
print(chisq.test(dades[,k], as.factor(P)))
print("valorsTest:")
print( ValorTestXquali(P,dades[,k]))
#calcular els pvalues de les quali
}
}
}#endfor
## [1] "Anàlisi per classes de la Variable: age"


## [1] "Estadístics per groups:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 35.00 43.00 44.18 52.00 90.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.00 22.00 27.00 30.64 36.00 90.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.00 25.00 36.00 37.62 47.00 90.00
## [1] "p-valueANOVA: 3.37964367791504e-190"
## [1] "p-value Kruskal-Wallis: 9.67398091244748e-189"
## [1] "p-values ValorsTest: "
## [1] 9.361532e-115 0.000000e+00 4.265215e-04
## [1] "Variable workclass"
## [1] "Categories=" "Fed" "Loc" "NoPay" "Priv"
## [6] "SelfI" "SelfN" "State"

## [1] "Categories=" "Fed" "Loc" "NoPay" "Priv"
## [6] "SelfI" "SelfN" "State"

## [1] "Categories=" "Fed" "Loc" "NoPay" "Priv"
## [6] "SelfI" "SelfN" "State"


## [1] "Cross Table:"
## P
## 1 2 3
## Fed 71 30 60
## Loc 134 49 169
## NoPay 1 1 1
## Priv 1304 1009 1405
## SelfI 136 17 24
## SelfN 234 57 96
## State 77 58 67
## [1] "Distribucions condicionades a columnes:"
##
## P Fed Loc NoPay Priv SelfI SelfN State
## 1 0.4409938 0.3806818 0.3333333 0.3507262 0.7683616 0.6046512 0.3811881
## 2 0.1863354 0.1392045 0.3333333 0.2713825 0.0960452 0.1472868 0.2871287
## 3 0.3726708 0.4801136 0.3333333 0.3778913 0.1355932 0.2480620 0.3316832

## [1] "Test Chi quadrat: "
## Warning in chisq.test(dades[, k], as.factor(P)): L'aproximació Chi-quadrat pot
## ser incorrecta

##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 243.13, df = 12, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P Fed Loc NoPay Priv SelfI
## 1 0.0362800204 0.0684721513 0.0005109862 0.6663260092 0.0694941237
## 2 0.0245700246 0.0401310401 0.0008190008 0.8263718264 0.0139230139
## 3 0.0329308452 0.0927552141 0.0005488474 0.7711306257 0.0131723381
## Xquali
## P SelfN State
## 1 0.1195707716 0.0393459377
## 2 0.0466830467 0.0475020475
## 3 0.0526893524 0.0367727772
##
## $vtest
## Xquali
## P Fed Loc NoPay Priv SelfI SelfN
## 1 1.3106026 -0.4273346 -0.2061301 -10.0353860 10.4624576 8.9487790
## 2 -1.7372278 -4.7557334 0.3594635 7.6191799 -4.6714582 -4.6201420
## 3 0.2216598 4.6787160 -0.1118419 3.3757449 -6.4402209 -4.9509571
## Xquali
## P State
## 1 -0.3035708
## 2 1.4497802
## 3 -0.9863264
##
## $pval
## Xquali
## P Fed Loc NoPay Priv SelfI
## 1 9.499603e-02 3.345678e-01 4.183446e-01 0.000000e+00 6.423963e-26
## 2 4.117348e-02 9.886364e-07 3.596242e-01 1.276463e-14 1.495345e-06
## 3 4.122894e-01 1.443385e-06 4.554744e-01 3.680807e-04 5.964984e-11
## Xquali
## P SelfN State
## 1 1.797181e-19 3.807275e-01
## 2 1.917388e-06 7.355991e-02
## 3 3.692469e-07 1.619865e-01
##
## [1] "Anàlisi per classes de la Variable: edu_num"


## [1] "Estadístics per groups:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 10.00 10.27 13.00 16.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 9.000 10.000 9.595 11.000 16.000
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 9.00 10.00 10.14 12.00 16.00
## [1] "p-valueANOVA: 1.231187841333e-11"
## [1] "p-value Kruskal-Wallis: 1.0090777949887e-08"
## [1] "p-values ValorsTest: "
## [1] 8.904730e-07 2.289280e-13 5.182494e-02
## [1] "Variable marital"
## [1] "Categories=" "Div" "Married" "NevMarr" "Sep"
## [6] "Widow"

## [1] "Categories=" "Div" "Married" "NevMarr" "Sep"
## [6] "Widow"

## [1] "Categories=" "Div" "Married" "NevMarr" "Sep"
## [6] "Widow"


## [1] "Cross Table:"
## P
## 1 2 3
## Div 0 115 576
## Married 1957 16 237
## NevMarr 0 999 703
## Sep 0 78 158
## Widow 0 13 148
## [1] "Distribucions condicionades a columnes:"
##
## P Div Married NevMarr Sep Widow
## 1 0.000000000 0.885520362 0.000000000 0.000000000 0.000000000
## 2 0.166425470 0.007239819 0.586956522 0.330508475 0.080745342
## 3 0.833574530 0.107239819 0.413043478 0.669491525 0.919254658


## [1] "Test Chi quadrat: "
##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 4865.3, df = 8, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P Div Married NevMarr Sep Widow
## 1 0.00000000 1.00000000 0.00000000 0.00000000 0.00000000
## 2 0.09418509 0.01310401 0.81818182 0.06388206 0.01064701
## 3 0.31613611 0.13007684 0.38583974 0.08671789 0.08122942
##
## $vtest
## Xquali
## P Div Married NevMarr Sep Widow
## 1 -22.708070 63.714078 -40.736517 -12.621168 -10.343430
## 2 -5.126216 -34.711880 40.527415 3.161785 -4.907284
## 3 27.605084 -33.628046 5.134320 9.977096 14.870249
##
## $pval
## Xquali
## P Div Married NevMarr Sep Widow
## 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 2 1.478117e-07 0.000000e+00 0.000000e+00 7.840272e-04 4.617323e-07
## 3 4.834171e-168 0.000000e+00 1.415828e-07 9.600218e-24 2.571091e-50
##
## [1] "Variable occupation"
## [1] "Categories=" "AdminCler" "Army" "CraftRep" "ExecMan"
## [6] "FarmFish" "HandlCl" "House" "MachOp" "Other"
## [11] "Prof" "ProtServ" "Sales" "Tech" "Trans"

## [1] "Categories=" "AdminCler" "Army" "CraftRep" "ExecMan"
## [6] "FarmFish" "HandlCl" "House" "MachOp" "Other"
## [11] "Prof" "ProtServ" "Sales" "Tech" "Trans"

## [1] "Categories=" "AdminCler" "Army" "CraftRep" "ExecMan"
## [6] "FarmFish" "HandlCl" "House" "MachOp" "Other"
## [11] "Prof" "ProtServ" "Sales" "Tech" "Trans"


## [1] "Cross Table:"
## P
## 1 2 3
## AdminCler 94 106 364
## Army 0 1 0
## CraftRep 378 123 122
## ExecMan 335 73 223
## FarmFish 83 40 23
## HandlCl 63 124 38
## House 0 8 16
## MachOp 142 82 88
## Other 69 190 261
## Prof 317 232 368
## ProtServ 66 19 20
## Sales 227 127 202
## Tech 46 37 57
## Trans 137 59 40
## [1] "Distribucions condicionades a columnes:"
##
## P AdminCler Army CraftRep ExecMan FarmFish HandlCl House
## 1 0.1666667 0.0000000 0.6067416 0.5309033 0.5684932 0.2800000 0.0000000
## 2 0.1879433 1.0000000 0.1974318 0.1156894 0.2739726 0.5511111 0.3333333
## 3 0.6453901 0.0000000 0.1958266 0.3534073 0.1575342 0.1688889 0.6666667
##
## P MachOp Other Prof ProtServ Sales Tech Trans
## 1 0.4551282 0.1326923 0.3456925 0.6285714 0.4082734 0.3285714 0.5805085
## 2 0.2628205 0.3653846 0.2529989 0.1809524 0.2284173 0.2642857 0.2500000
## 3 0.2820513 0.5019231 0.4013086 0.1904762 0.3633094 0.4071429 0.1694915

## [1] "Test Chi quadrat: "
## Warning in chisq.test(dades[, k], as.factor(P)): L'aproximació Chi-quadrat pot
## ser incorrecta

##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 811.03, df = 26, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P AdminCler Army CraftRep ExecMan FarmFish
## 1 0.0480327031 0.0000000000 0.1931527849 0.1711803781 0.0424118549
## 2 0.0868140868 0.0008190008 0.1007371007 0.0597870598 0.0327600328
## 3 0.1997804610 0.0000000000 0.0669593853 0.1223929748 0.0126234907
## Xquali
## P HandlCl House MachOp Other Prof
## 1 0.0321921308 0.0000000000 0.0725600409 0.0352580480 0.1619826265
## 2 0.1015561016 0.0065520066 0.0671580672 0.1556101556 0.1900081900
## 3 0.0208562020 0.0087815587 0.0482985730 0.1432491767 0.2019758507
## Xquali
## P ProtServ Sales Tech Trans
## 1 0.0337250894 0.1159938682 0.0235053654 0.0700051099
## 2 0.0155610156 0.1040131040 0.0303030303 0.0483210483
## 3 0.0109769484 0.1108671789 0.0312843030 0.0219538968
##
## $vtest
## Xquali
## P AdminCler Army CraftRep ExecMan FarmFish
## 1 -11.60967051 -0.80202493 11.77042783 7.68098370 4.44976345
## 2 -3.30161976 1.75943833 -2.90412499 -8.03843303 0.84986917
## 3 14.72104468 -0.75725243 -9.34433831 -0.61380766 -5.27131555
## Xquali
## P HandlCl House MachOp Other Prof
## 1 -3.50346981 -3.93817376 2.38190151 -12.76969794 -3.13827716
## 2 10.96543617 1.01885953 0.79064925 6.79546034 0.68632933
## 3 -6.23562355 3.08432096 -3.12136252 6.88401993 2.56996140
## Xquali
## P ProtServ Sales Tech Trans
## 1 5.03256379 0.86469134 -1.54494024 6.09803249
## 2 -1.52465370 -0.91884304 0.56110210 0.21247452
## 3 -3.74266723 -0.05668115 1.06589225 -6.37389231
##
## $pval
## Xquali
## P AdminCler Army CraftRep ExecMan FarmFish
## 1 0.000000e+00 2.112693e-01 2.772131e-32 7.893579e-15 4.298246e-06
## 2 4.806414e-04 3.925154e-02 1.841406e-03 4.440892e-16 1.976989e-01
## 3 2.361536e-49 2.244493e-01 0.000000e+00 2.696712e-01 6.772468e-08
## Xquali
## P HandlCl House MachOp Other Prof
## 1 2.296194e-04 4.105207e-05 8.611752e-03 0.000000e+00 8.497205e-04
## 2 2.801487e-28 1.541348e-01 2.145744e-01 5.398361e-12 2.462527e-01
## 3 2.249910e-10 1.020087e-03 9.000814e-04 2.909336e-12 5.085492e-03
## Xquali
## P ProtServ Sales Tech Trans
## 1 2.419817e-07 1.936041e-01 6.118036e-02 5.369096e-10
## 2 6.367275e-02 1.790888e-01 2.873640e-01 4.158684e-01
## 3 9.103864e-05 4.773996e-01 1.432362e-01 9.214496e-11
##
## [1] "Variable relationship"
## [1] "Categories=" "Husband" "Not-in-family" "Other-relative"
## [5] "Own-child" "Unmarried" "Wife"

## [1] "Categories=" "Husband" "Not-in-family" "Other-relative"
## [5] "Own-child" "Unmarried" "Wife"

## [1] "Categories=" "Husband" "Not-in-family" "Other-relative"
## [5] "Own-child" "Unmarried" "Wife"


## [1] "Cross Table:"
## P
## 1 2 3
## Husband 1948 0 0
## Not-in-family 0 558 767
## Other-relative 6 149 17
## Own-child 3 443 363
## Unmarried 0 70 446
## Wife 0 1 229
## [1] "Distribucions condicionades a columnes:"
##
## P Husband Not-in-family Other-relative Own-child Unmarried
## 1 1.000000000 0.000000000 0.034883721 0.003708282 0.000000000
## 2 0.000000000 0.421132075 0.866279070 0.547589617 0.135658915
## 3 0.000000000 0.578867925 0.098837209 0.448702101 0.864341085
##
## P Wife
## 1 0.000000000
## 2 0.004347826
## 3 0.995652174


## [1] "Test Chi quadrat: "
##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 5854.3, df = 10, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P Husband Not-in-family Other-relative Own-child Unmarried
## 1 0.9954011242 0.0000000000 0.0030659172 0.0015329586 0.0000000000
## 2 0.0000000000 0.4570024570 0.1220311220 0.3628173628 0.0573300573
## 3 0.0000000000 0.4209659715 0.0093304061 0.1992316136 0.2447859495
## Xquali
## P Wife
## 1 0.0000000000
## 2 0.0008190008
## 3 0.1256860593
##
## $vtest
## Xquali
## P Husband Not-in-family Other-relative Own-child Unmarried Wife
## 1 70.443800 -34.049341 -9.749191 -24.678035 -19.236298 -12.451858
## 2 -32.111205 17.486165 19.325686 21.939406 -6.060311 -8.668763
## 3 -42.774452 18.921061 -7.364643 5.441990 24.918090 20.366270
##
## $pval
## Xquali
## P Husband Not-in-family Other-relative Own-child Unmarried
## 1 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## 2 0.000000e+00 9.132125e-69 1.633078e-83 5.465957e-107 6.792915e-10
## 3 0.000000e+00 3.824933e-80 8.881784e-14 2.634438e-08 2.368874e-137
## Xquali
## P Wife
## 1 0.000000e+00
## 2 0.000000e+00
## 3 1.665531e-92
##
## [1] "Variable race"
## [1] "Categories=" "Amer-Indian-Eskimo" "Asian-Pac-Islander"
## [4] "Black" "Other" "White"

## [1] "Categories=" "Amer-Indian-Eskimo" "Asian-Pac-Islander"
## [4] "Black" "Other" "White"

## [1] "Categories=" "Amer-Indian-Eskimo" "Asian-Pac-Islander"
## [4] "Black" "Other" "White"


## [1] "Cross Table:"
## P
## 1 2 3
## Amer-Indian-Eskimo 16 9 24
## Asian-Pac-Islander 63 70 23
## Black 87 153 206
## Other 10 21 16
## White 1781 968 1553
## [1] "Distribucions condicionades a columnes:"
##
## P Amer-Indian-Eskimo Asian-Pac-Islander Black Other White
## 1 0.3265306 0.4038462 0.1950673 0.2127660 0.4139935
## 2 0.1836735 0.4487179 0.3430493 0.4468085 0.2250116
## 3 0.4897959 0.1474359 0.4618834 0.3404255 0.3609949


## [1] "Test Chi quadrat: "
##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 147.75, df = 8, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P Amer-Indian-Eskimo Asian-Pac-Islander Black Other White
## 1 0.008175779 0.032192131 0.044455800 0.005109862 0.910066428
## 2 0.007371007 0.057330057 0.125307125 0.017199017 0.792792793
## 3 0.013172338 0.012623491 0.113062569 0.008781559 0.852360044
##
## $vtest
## Xquali
## P Amer-Indian-Eskimo Asian-Pac-Islander Black Other White
## 1 -0.9349750 0.3235966 -8.9016856 -2.5210857 8.1264236
## 2 -0.9910740 6.0408825 5.0915893 3.2484873 -7.8406934
## 3 1.8329003 -5.7207371 4.4823472 -0.3431371 -1.2420612
##
## $pval
## Xquali
## P Amer-Indian-Eskimo Asian-Pac-Islander Black Other
## 1 1.749006e-01 3.731217e-01 0.000000e+00 5.849668e-03
## 2 1.608247e-01 7.663678e-10 1.775373e-07 5.801019e-04
## 3 3.340870e-02 5.303145e-09 3.691325e-06 3.657477e-01
## Xquali
## P White
## 1 2.210710e-16
## 2 2.220446e-15
## 3 1.071070e-01
##
## [1] "Variable sex"
## [1] "Categories=" "Female" "Male"

## [1] "Categories=" "Female" "Male"

## [1] "Categories=" "Female" "Male"


## [1] "Cross Table:"
## P
## 1 2 3
## Female 2 159 1512
## Male 1955 1062 310
## [1] "Distribucions condicionades a columnes:"
##
## P Female Male
## 1 0.001195457 0.587616471
## 2 0.095038852 0.319206492
## 3 0.903765690 0.093177036


## [1] "Test Chi quadrat: "
##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 3214.4, df = 2, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P Female Male
## 1 0.001021972 0.998978028
## 2 0.130221130 0.869778870
## 3 0.829857300 0.170142700
##
## $vtest
## Xquali
## P Female Male
## 1 -40.08875 40.08875
## 2 -17.40949 17.40949
## 3 56.19643 -56.19643
##
## $pval
## Xquali
## P Female Male
## 1 0.000000e+00 0.000000e+00
## 2 0.000000e+00 3.495277e-68
## 3 0.000000e+00 0.000000e+00
##
## [1] "Anàlisi per classes de la Variable: cap_gain"


## [1] "Estadístics per groups:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 1923 0 99999
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 0.0 297.1 0.0 27828.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 0.0 544.8 0.0 99999.0
## [1] "p-valueANOVA: 2.01817862825246e-10"
## [1] "p-value Kruskal-Wallis: 1.64878128351474e-19"
## [1] "p-values ValorsTest: "
## [1] 6.363026e-13 2.332985e-05 1.746724e-04
## [1] "Anàlisi per classes de la Variable: cap_loss"


## [1] "Estadístics per groups:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 0.0 138.2 0.0 2603.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 54.33 0.00 2444.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 65.66 0.00 4356.00
## [1] "p-valueANOVA: 9.61265012137871e-09"
## [1] "p-value Kruskal-Wallis: 2.10298545751866e-09"
## [1] "p-values ValorsTest: "
## [1] 6.588531e-11 1.635098e-04 4.530013e-04
## [1] "Anàlisi per classes de la Variable: hours_week"


## [1] "Estadístics per groups:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 40 40 44 50 99
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 35.00 40.00 38.31 40.00 99.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 35.00 40.00 37.43 40.00 99.00
## [1] "p-valueANOVA: 1.14181725805632e-66"
## [1] "p-value Kruskal-Wallis: 6.19411685035705e-85"
## [1] "p-values ValorsTest: "
## [1] 6.124643e-64 5.816605e-10 0.000000e+00
## [1] "Variable native_country"
## [1] "Categories=" "Other" "USA"

## [1] "Categories=" "Other" "USA"

## [1] "Categories=" "Other" "USA"


## [1] "Cross Table:"
## P
## 1 2 3
## Other 190 254 46
## USA 1767 967 1776
## [1] "Distribucions condicionades a columnes:"
##
## P Other USA
## 1 0.38775510 0.39179601
## 2 0.51836735 0.21441242
## 3 0.09387755 0.39379157


## [1] "Test Chi quadrat: "
##
## Pearson's Chi-squared test
##
## data: dades[, k] and as.factor(P)
## X-squared = 276.33, df = 2, p-value < 2.2e-16
##
## [1] "valorsTest:"
## $rowpf
## Xquali
## P Other USA
## 1 0.09708738 0.90291262
## 2 0.20802621 0.79197379
## 3 0.02524698 0.97475302
##
## $vtest
## Xquali
## P Other USA
## 1 -0.1740619 0.1740619
## 2 14.8742209 -14.8742209
## 3 -13.1013773 13.1013773
##
## $pval
## Xquali
## P Other USA
## 1 4.309084e-01 4.309084e-01
## 2 2.422967e-50 0.000000e+00
## 3 0.000000e+00 1.616682e-39
##
## [1] "Anàlisi per classes de la Variable: income_integer"


## [1] "Estadístics per groups:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31450 43950 49230 48955 55000 80040
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 28120 38690 40833 41777 44700 59916
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31500 40250 44090 44486 47950 72200
## [1] "p-valueANOVA: 2.33607932990687e-236"
## [1] "p-value Kruskal-Wallis: 8.99199691151354e-225"
## [1] "p-values ValorsTest: "
## [1] 5.076755e-183 0.000000e+00 0.000000e+00
#descriptors de les classes més significatius. Afegir info qualits
for (c in 1:length(levels(as.factor(P)))) {
if(!is.na(levels(as.factor(P))[c])){
print(paste("P.values per class:",levels(as.factor(P))[c]));
print(sort(pvalk[c,]), digits=3)
}
}
## [1] "P.values per class: 1"
## workclass marital occupation relationship race
## 0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00
## sex native_country income_integer age hours_week
## 0.00e+00 0.00e+00 5.08e-183 9.36e-115 6.12e-64
## cap_gain cap_loss edu_num
## 6.36e-13 6.59e-11 8.90e-07
## [1] "P.values per class: 2"
## age workclass marital occupation relationship
## 0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00
## race sex native_country income_integer edu_num
## 0.00e+00 0.00e+00 0.00e+00 0.00e+00 2.29e-13
## hours_week cap_gain cap_loss
## 5.82e-10 2.33e-05 1.64e-04
## [1] "P.values per class: 3"
## workclass marital occupation relationship race
## 0.000000 0.000000 0.000000 0.000000 0.000000
## sex hours_week native_country income_integer cap_gain
## 0.000000 0.000000 0.000000 0.000000 0.000175
## age cap_loss edu_num
## 0.000427 0.000453 0.051825